Preliminaries¶
Software versions and configurations¶
pip install dimcat Jinja2 colorlover GitPython plotly
import os
from git import Repo
import dimcat as dc
from ms3 import __version__ as ms3_version
dataset_path = "~/all_subcorpora"
repo = Repo(dataset_path)
notebook_repo = Repo('.', search_parent_directories=True)
notebook_repo_path = notebook_repo.git.rev_parse("--show-toplevel")
print(f"Notebook repository '{os.path.basename(notebook_repo_path)}' @ {notebook_repo.commit().hexsha[:7]}")
print(f"Data repo '{os.path.basename(dataset_path)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3_version}")
Notebook repository 'dimcat' @ 36fcf12
Data repo 'all_subcorpora' @ f549aa9
dimcat version 0.2.0.post1.dev64+gda0a036
ms3 version 1.0.2
%load_ext autoreload
%autoreload 2
from collections import defaultdict, Counter
from fractions import Fraction
from IPython.display import HTML
import ms3
import plotly.express as px
import colorlover
import pandas as pd
import numpy as np
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 500)
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
plt.set_loglevel('error')
This javascript allows to add a “Toggle Code” button to every cell as per http://www.eointravers.com/post/jupyter-toggle/
HTML('''<script>
function code_toggle() {
if (code_shown){
$('div.input').hide('500');
$('#toggleButton').val('Show Code')
} else {
$('div.input').show('500');
$('#toggleButton').val('Hide Code')
}
code_shown = !code_shown
}
$( document ).ready(function(){
code_shown=false;
$('div.input').hide()
});
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
STD_LAYOUT = {
'paper_bgcolor': '#FFFFFF',
'plot_bgcolor': '#FFFFFF',
'margin': {'l': 40, 'r': 0, 'b': 0, 't': 40, 'pad': 0},
'font': {'size': 15}
}
import colorlover
#for name, scales in colorlover.scales['6']['qual'].items():
# print(name)
# display(HTML(colorlover.to_html(scales)))
cadence_colors = dict(zip(('HC', 'PAC', 'PC', 'IAC', 'DC', 'EC'), colorlover.scales['6']['qual']['Set1']))
def value_count_df(S, thing=None, counts='counts'):
thing = S.name if thing is None else thing
df = S.value_counts().rename(counts).to_frame()
df.index.rename(thing, inplace=True)
return df
def color_background(x, color="#ffffb3"):
return np.where(x.notna().to_numpy(), f"background-color: {color};", None)
Data loading¶
dataset = dc.Dataset()
for folder in ['bach_solo', 'beethoven_piano_sonatas', 'c_schumann_lieder', 'chopin_mazurkas', 'corelli', 'debussy_suite_bergamasque', 'dvorak_silhouettes', 'grieg_lyrical_pieces', 'handel_keyboard', 'jc_bach_sonatas', 'liszt_pelerinage', 'mahler_kindertotenlieder', 'medtner_tales', 'pleyel_quartets', 'scarlatti_sonatas', 'schubert_dances', 'schumann_kinderszenen', 'tchaikovsky_seasons', 'wf_bach_sonatas']:
print("Loading", folder)
path = os.path.join(dataset_path, folder)
dataset.load(directory=path)
Loading bach_solo
Loading beethoven_piano_sonatas
Loading c_schumann_lieder
Loading chopin_mazurkas
Loading corelli
Loading debussy_suite_bergamasque
Loading dvorak_silhouettes
Loading grieg_lyrical_pieces
Loading handel_keyboard
Loading jc_bach_sonatas
Loading liszt_pelerinage
Loading mahler_kindertotenlieder
Loading medtner_tales
Loading pleyel_quartets
Loading scarlatti_sonatas
Loading schubert_dances
Loading schumann_kinderszenen
Loading tchaikovsky_seasons
Loading wf_bach_sonatas
dataset.data
[default|all]
All corpora
-----------
View: This view is called 'default'. It
- excludes fnames that are not contained in the metadata,
- filters out file extensions requiring conversion (such as .xml), and
- excludes review files and folders.
has active scores measures notes expanded
metadata view detected detected parsed detected parsed detected parsed
corpus
bach_solo yes default 68 68 68 68 68 68 68
beethoven_piano_sonatas yes default 87 87 87 87 87 64 64
c_schumann_lieder yes default 12 12 12 12 12 12 12
chopin_mazurkas yes default 55 55 55 55 55 55 55
corelli yes default 149 149 149 149 149 149 149
debussy_suite_bergamasque yes default 4 4 4 4 4 4 4
dvorak_silhouettes yes default 12 12 12 12 12 12 12
grieg_lyrical_pieces yes default 66 66 66 66 66 66 66
handel_keyboard yes default 6 6 6 6 6 6 6
jc_bach_sonatas yes default 29 29 29 29 29 29 29
liszt_pelerinage yes default 19 19 19 19 19 19 19
mahler_kindertotenlieder yes default 5 5 5 5 5 5 5
medtner_tales yes default 19 19 19 19 19 19 19
pleyel_quartets yes default 6 6 6 6 6 6 6
scarlatti_sonatas yes default 69 69 69 69 69 69 69
schubert_dances yes default 444 444 444 444 444 16 16
schumann_kinderszenen yes default 13 13 13 13 13 13 13
tchaikovsky_seasons yes default 12 12 12 12 12 12 12
wf_bach_sonatas yes default 9 9 9 9 9 9 9
2065/7034 files are excluded from this view.
1810 files have been excluded based on their subdir.
255 files have been excluded based on their file name.
There are 1 orphans that could not be attributed to any of the respective corpus's fnames.
#dataset = Corpus(directory=dataset_path)
#dataset.data
Filtering out pieces without cadence annotations¶
hascadence = dc.HasCadenceAnnotationsFilter().process_data(dataset)
display(HTML(f"<h4>Before: {dataset.n_indices} pieces; "
f"after removing those without cadence labels: {hascadence.n_indices}</h4>"))
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 96) ms3.Parse.beethoven_piano_sonatas.04-3 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 96 (timesig 3/4, act_dur 1/4) is completed by 1 incorrect duration (expected: 1/2):
{97: Fraction(3, 4)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 112) ms3.Parse.beethoven_piano_sonatas.04-3 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 112 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{25: Fraction(3, 4), 113: Fraction(1, 4)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 39) ms3.Parse.beethoven_piano_sonatas.13-1 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 39 (timesig 3/4, act_dur 1/8) is completed by 1 incorrect duration (expected: 5/8):
{40: Fraction(3, 4)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 47) ms3.Parse.beethoven_piano_sonatas.13-1 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 47 (timesig 3/4, act_dur 5/8) is completed by 1 incorrect duration (expected: 1/8):
{14: Fraction(1, 1), 48: Fraction(1, 8)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 267) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 267 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{268: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 269) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 269 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{270: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 271) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 271 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{272: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 273) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 273 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{274: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 275) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 275 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{276: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 277) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 277 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{278: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 279) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 279 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{280: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 281) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 281 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{282: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 283) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 283 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{284: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 285) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
The incomplete MC 285 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
{286: Fraction(1, 2)}
MCS_NOT_EXCLUDED_FROM_BARCOUNT_WARNING (1, 268, 270, 272, 274, 276, 278, 280, 282, 284, 286) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 204) check_measure_numbers():
MCs 268, 270, 272, 274, 276, 278, 280, 282, 284, 286 seem to be offset from the MN's beginning but have not been excluded from barcount. Context:
mc mn act_dur mc_offset dont_count numbering_offset
266 267 266 1/2 0 <NA> <NA>
267 268 267 1/2 1/4 <NA> <NA>
268 269 268 1/2 0 <NA> <NA>
269 270 269 1/2 1/4 <NA> <NA>
270 271 270 1/2 0 <NA> <NA>
271 272 271 1/2 1/4 <NA> <NA>
272 273 272 1/2 0 <NA> <NA>
273 274 273 1/2 1/4 <NA> <NA>
274 275 274 1/2 0 <NA> <NA>
275 276 275 1/2 1/4 <NA> <NA>
276 277 276 1/2 0 <NA> <NA>
277 278 277 1/2 1/4 <NA> <NA>
278 279 278 1/2 0 <NA> <NA>
279 280 279 1/2 1/4 <NA> <NA>
280 281 280 1/2 0 <NA> <NA>
281 282 281 1/2 1/4 <NA> <NA>
282 283 282 1/2 0 <NA> <NA>
283 284 283 1/2 1/4 <NA> <NA>
284 285 284 1/2 0 <NA> <NA>
285 286 285 1/2 1/4 <NA> <NA>
UNUSED_FINE_MARKER_WARNING (20, 19) ms3.Parse.schubert_dances.D735galopp01a -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 266) __init__():
Piece has a Fine but the last MC is missing a repeat sign or a D.C. (da capo) or D.S. (dal segno). Ignoring Fine.
Before: 1084 pieces; after removing those without cadence labels: 587
Show corpora containing pieces with cadence annotations¶
grouped_by_dataset = dc.CorpusGrouper().process_data(hascadence)
corpora = {group[0]: f"{len(ixs)} pieces" for group, ixs in grouped_by_dataset.indices.items()}
print(f"{len(corpora)} corpora with {sum(map(len, grouped_by_dataset.indices.values()))} pieces containing cadence annotations:")
corpora
19 corpora with 587 pieces containing cadence annotations:
{'bach_solo': '32 pieces',
'beethoven_piano_sonatas': '64 pieces',
'c_schumann_lieder': '12 pieces',
'chopin_mazurkas': '50 pieces',
'corelli': '148 pieces',
'debussy_suite_bergamasque': '4 pieces',
'dvorak_silhouettes': '12 pieces',
'grieg_lyrical_pieces': '65 pieces',
'handel_keyboard': '6 pieces',
'jc_bach_sonatas': '29 pieces',
'liszt_pelerinage': '19 pieces',
'mahler_kindertotenlieder': '5 pieces',
'medtner_tales': '16 pieces',
'pleyel_quartets': '6 pieces',
'scarlatti_sonatas': '69 pieces',
'schubert_dances': '16 pieces',
'schumann_kinderszenen': '13 pieces',
'tchaikovsky_seasons': '12 pieces',
'wf_bach_sonatas': '9 pieces'}
All annotation labels from the selected pieces¶
all_labels = hascadence.get_facet('expanded')
print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:10, 14:].style.apply(color_background, subset="chord")
95751 hand-annotated harmony labels:
| chord | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | special | pedalend | placement | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | interval | |||||||||||||||||||
| bach_solo | BWV1009_01_Prelude | [0.0, 6.0) | I | I | nan | nan | nan | nan | nan | { | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |
| [6.0, 8.0) | V | V | nan | nan | nan | nan | nan | nan | M | False | False | (1, 5, 2) | () | 1 | 1 | nan | nan | nan | |||
| [8.0, 9.0) | V7 | V | nan | 7 | nan | nan | nan | nan | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | nan | nan | nan | |||
| [9.0, 12.0) | I | I | nan | nan | nan | nan | nan | nan | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |||
| [12.0, 14.0) | V | V | nan | nan | nan | nan | nan | nan | M | False | False | (1, 5, 2) | () | 1 | 1 | nan | nan | nan | |||
| [14.0, 15.0) | V7 | V | nan | 7 | nan | nan | nan | nan | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | nan | nan | nan | |||
| [15.0, 20.0) | I | I | nan | nan | nan | nan | nan | } | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |||
| [18.25, 18.25) | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | |||||
| [20.0, 21.0) | ii/V | ii | nan | nan | nan | V | nan | nan | m | False | False | (3, 0, 4) | () | 3 | 3 | nan | nan | nan | |||
| [21.0, 23.0) | V6/V | V | nan | 6 | nan | V | nan | nan | M | False | False | (6, 3, 2) | () | 2 | 6 | nan | nan | nan |
HTML('''<script>
function code_toggle() {
if (code_shown){
$('div.input').hide('500');
$('#toggleButton').val('Show Code')
} else {
$('div.input').show('500');
$('#toggleButton').val('Hide Code')
}
code_shown = !code_shown
}
$( document ).ready(function(){
code_shown=false;
$('div.input').hide()
});
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
Metadata¶
dataset_metadata = hascadence.data.metadata()
hascadence_metadata = dataset_metadata.loc[hascadence.indices[()]]
hascadence_metadata.index.rename('dataset', level=0, inplace=True)
hascadence_metadata.head()
| TimeSig | KeySig | last_mc | last_mn | length_qb | last_mc_unfolded | last_mn_unfolded | length_qb_unfolded | volta_mcs | all_notes_qb | n_onsets | n_onset_positions | guitar_chord_count | form_label_count | label_count | harmony_version | annotated_key | annotators | composed_start | composed_end | composer | workTitle | movementNumber | movementTitle | workNumber | poet | lyricist | arranger | copyright | creationDate | mscVersion | platform | source | translator | musescore | ms3_version | title_text | lyricist_text | has_drumset | ambitus | subdirectory | rel_path | originalFormat | staff_1_ambitus | staff_1_instrument | reviewers | subtitle_text | composer_text | composed_source | imslp | musicbrainz | viaf | wikidata | staff_2_ambitus | staff_2_instrument | score_integrity | staff_3_ambitus | staff_3_instrument | imslp.1 | key | mode | typesetter | comments | electronic editor | electronic encoder | staff_4_ambitus | staff_4_instrument | text | score integrity | extension | Deutsch | dance | goldenberg_id | ||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| dataset | fname | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| bach_solo | BWV1009_01_Prelude | 1: 3/4 | 1: 0 | 88 | 88 | 264.0 | 88.0 | 88.0 | 264.0 | NaN | 292.00 | 1007 | 970 | 0 | 0 | 113 | 2.3.0 | C | Adrian Nagel | 1717 | 1723 | Bach, J.S. | Cello Suite No.3 in C major | 1 | NaN | NaN | NaN | NaN | NaN | NaN | 2021-02-21 | NaN | Apple Macintosh | NaN | NaN | 3.6.2 | 1.2.2 | NaN | NaN | False | 36-67 (C2-G4) | MS3 | MS3/BWV1009_01_Prelude.mscx | NaN | 36-67 (C2-G4) | Piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| BWV1009_02_Allemande | 1: 4/4 | 1: 0 | 25 | 24 | 96.0 | 50.0 | 48.0 | 192.0 | NaN | 107.25 | 486 | 469 | 0 | 0 | 106 | 2.3.0 | C | Adrian Nagel | 1717 | 1723 | Bach, J.S. | Cello Suite No.3 in C major | 2 | NaN | NaN | NaN | NaN | NaN | NaN | 2021-02-21 | NaN | Apple Macintosh | NaN | NaN | 3.6.0 | 1.2.2 | NaN | NaN | False | 36-67 (C2-G4) | MS3 | MS3/BWV1009_02_Allemande.mscx | NaN | 36-67 (C2-G4) | Piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| BWV1009_03_Courante | 1: 6/8 | 1: 0 | 86 | 84 | 252.0 | 172.0 | 168.0 | 504.0 | NaN | 257.00 | 495 | 492 | 0 | 0 | 83 | 2.3.0 | C | Adrian Nagel | 1717 | 1723 | Bach, J.S. | Cello Suite No.3 in C major | 3 | NaN | NaN | NaN | NaN | NaN | NaN | 2021-02-21 | NaN | Apple Macintosh | NaN | NaN | 3.6.0 | 1.2.2 | NaN | NaN | False | 36-64 (C2-E4) | MS3 | MS3/BWV1009_03_Courante.mscx | NaN | 36-64 (C2-E4) | Piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| BWV1009_04_Sarabande | 1: 3/4 | 1: 0 | 24 | 24 | 72.0 | 48.0 | 48.0 | 144.0 | NaN | 122.00 | 217 | 171 | 0 | 0 | 55 | 2.3.0 | C | Adrian Nagel | 1717 | 1723 | Bach, J.S. | Cello Suite No.3 in C major | 4 | NaN | NaN | NaN | NaN | NaN | NaN | 2021-03-08 | NaN | Apple Macintosh | NaN | NaN | 3.6.0 | 1.2.2 | NaN | NaN | False | 36-67 (C2-G4) | MS3 | MS3/BWV1009_04_Sarabande.mscx | NaN | 36-67 (C2-G4) | Piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| BWV1009_05_BourréeI | 1: 2/2 | 1: 0 | 29 | 28 | 112.0 | 58.0 | 56.0 | 224.0 | NaN | 118.50 | 191 | 186 | 0 | 0 | 64 | 2.3.0 | C | Adrian Nagel | 1717 | 1723 | Bach, J.S. | Cello Suite No.3 in C major | 5 | NaN | NaN | NaN | NaN | NaN | NaN | 2021-02-21 | NaN | Apple Macintosh | NaN | NaN | 3.6.2 | 1.2.2 | NaN | NaN | False | 36-65 (C2-F4) | MS3 | MS3/BWV1009_05_BourréeI.mscx | NaN | 36-65 (C2-F4) | Piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
hascadence_metadata.groupby(level=0).composed_end.value_counts()
dataset composed_end
bach_solo 1723 32
beethoven_piano_sonatas 1795 12
1798 10
1802 10
1797 8
1822 5
1799 3
1804 3
1805 3
1810 3
1820 3
1796 2
1809 2
c_schumann_lieder 1844 6
1853 6
chopin_mazurkas 1832 9
1833 8
1838 5
1846 4
1837 3
1842 3
1844 3
1845 3
1826 2
1830 2
1835 2
1827 1
1834 1
1839 1
1840 1
1841 1
1849 1
corelli 1689 50
1694 50
1681 48
debussy_suite_bergamasque 1905 4
dvorak_silhouettes 1879 12
grieg_lyrical_pieces 1867 8
1883 7
1901 7
1886 6
1888 6
1891 6
1893 6
1895 6
1896 6
1899 6
1878 1
handel_keyboard 1720 6
jc_bach_sonatas 1765 16
1780 13
liszt_pelerinage 1855 9
1858 7
1861 3
mahler_kindertotenlieder 1904 5
medtner_tales 1917 8
1912 4
1924 2
1905 1
1907 1
pleyel_quartets 1783 6
scarlatti_sonatas 1739 32
1742 30
1746 4
1749 3
schubert_dances 1820 14
1823 1
1825 1
schumann_kinderszenen 1839 13
tchaikovsky_seasons 1876 12
wf_bach_sonatas 1760 9
Name: composed_end, dtype: int64
mean_composition_years = hascadence_metadata.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
bar_data = pd.concat([mean_composition_years.rename('year'),
hascadence_metadata.groupby(level='dataset').size().rename('pieces')],
axis=1
).reset_index()
fig = px.bar(bar_data, x='year', y='pieces', color='dataset', title='Pieces contained in the dataset')
fig.update_traces(width=5)
Keys¶
Computing extent of key segments from annotations¶
In the following, major and minor keys are distinguished as boolean localkey_is_minor=(False|True)
segmented_by_keys = dc.Pipeline([
dc.LocalKeySlicer(),
dc.ModeGrouper()])\
.process_data(hascadence)
key_segments = segmented_by_keys.get_slice_info()
print(key_segments.duration_qb.dtype)
key_segments.duration_qb = pd.to_numeric(key_segments.duration_qb)
object
key_segments.iloc[:15, 11:].fillna('').style.apply(color_background, subset="localkey")
| globalkey | localkey | pedal | chord | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | special | volta | pedalend | placement | ||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| localkey_is_minor | corpus | fname | localkey_slice | ||||||||||||||||||||||
| False | bach_solo | BWV1009_01_Prelude | [0.0, 264.0) | C | I | I | I | { | M | False | False | (0, 4, 1) | () | 0 | 0 | ||||||||||
| BWV1009_02_Allemande | [0.0, 16.75) | C | I | V | V | { | M | False | False | (1, 5, 2) | () | 1 | 1 | ||||||||||||
| [16.75, 52.75) | C | V | V6 | V | 6 | M | False | False | (5, 2, 1) | () | 1 | 5 | |||||||||||||
| [68.75, 96.0) | C | I | V7/IV | V | 7 | IV | Mm7 | False | False | (0, 4, 1, -2) | () | 0 | 0 | ||||||||||||
| BWV1009_03_Courante | [0.0, 24.5) | C | I | I | I | { | M | False | False | (0, 4, 1) | () | 0 | 0 | ||||||||||||
| [24.5, 123.5) | C | V | I6 | I | 6 | M | False | False | (4, 1, 0) | () | 0 | 4 | |||||||||||||
| [168.5, 252.0) | C | I | V7/IV | V | 7 | IV | Mm7 | False | False | (0, 4, 1, -2) | () | 0 | 0 | ||||||||||||
| BWV1009_04_Sarabande | [0.0, 13.0) | C | I | I | I | { | M | False | False | (0, 4, 1) | () | 0 | 0 | ||||||||||||
| [13.0, 27.0) | C | V | ii7 | ii | 7 | mm7 | False | False | (2, -1, 3, 0) | () | 2 | 2 | |||||||||||||
| [48.0, 60.0) | C | V | viio64 | vii | o | 64 | { | o | False | False | (-1, 5, 2) | () | 5 | -1 | |||||||||||
| [60.0, 72.0) | C | I | viio64/IV | vii | o | 64 | IV | o | False | False | (-2, 4, 1) | () | 4 | -2 | |||||||||||
| BWV1009_05_BourréeI | [0.0, 17.0) | C | I | I6 | I | 6 | { | M | False | False | (4, 1, 0) | () | 0 | 4 | |||||||||||
| [17.0, 40.0) | C | V | ii | ii | m | False | False | (2, -1, 3) | () | 2 | 2 | ||||||||||||||
| [66.0, 112.0) | C | I | V/V | V | V | M | False | False | (2, 6, 3) | () | 2 | 2 | |||||||||||||
| BWV1009_06_BourréeII | [17.0, 41.0) | c | III | I | I | { | M | True | False | (0, 4, 1) | () | 0 | 0 |
Ratio between major and minor key segments by aggregated durations¶
Overall¶
maj_min_ratio = key_segments.groupby(level="localkey_is_minor").duration_qb.sum().to_frame()
maj_min_ratio['fraction'] = (100.0 * maj_min_ratio.duration_qb / maj_min_ratio.duration_qb.sum()).round(1)
maj_min_ratio
| duration_qb | fraction | |
|---|---|---|
| localkey_is_minor | ||
| False | 87978.0 | 61.3 |
| True | 55521.0 | 38.7 |
By dataset¶
segment_duration_per_dataset = key_segments.groupby(level=["corpus", "localkey_is_minor"]).duration_qb.sum().round(2)
norm_segment_duration_per_dataset = 100 * segment_duration_per_dataset / segment_duration_per_dataset.groupby(level="corpus").sum()
maj_min_ratio_per_dataset = pd.concat([segment_duration_per_dataset,
norm_segment_duration_per_dataset.rename('fraction').round(1).astype(str)+" %"],
axis=1)
segment_duration_per_dataset = key_segments.groupby(level=["corpus", "localkey_is_minor"]).duration_qb.sum().reset_index()
maj_min_ratio_per_dataset.reset_index()
| corpus | localkey_is_minor | duration_qb | fraction | |
|---|---|---|---|---|
| 0 | bach_solo | False | 3340.00 | 59.5 % |
| 1 | bach_solo | True | 2277.25 | 40.5 % |
| 2 | beethoven_piano_sonatas | False | 23659.75 | 66.3 % |
| 3 | beethoven_piano_sonatas | True | 12003.38 | 33.7 % |
| 4 | c_schumann_lieder | False | 1296.00 | 88.9 % |
| 5 | c_schumann_lieder | True | 162.50 | 11.1 % |
| 6 | chopin_mazurkas | False | 7845.50 | 57.2 % |
| 7 | chopin_mazurkas | True | 5881.75 | 42.8 % |
| 8 | corelli | False | 9607.00 | 53.4 % |
| 9 | corelli | True | 8387.00 | 46.6 % |
| 10 | debussy_suite_bergamasque | False | 584.00 | 36.1 % |
| 11 | debussy_suite_bergamasque | True | 1032.00 | 63.9 % |
| 12 | dvorak_silhouettes | False | 1239.50 | 66.9 % |
| 13 | dvorak_silhouettes | True | 613.00 | 33.1 % |
| 14 | grieg_lyrical_pieces | False | 9900.17 | 60.4 % |
| 15 | grieg_lyrical_pieces | True | 6504.33 | 39.6 % |
| 16 | handel_keyboard | False | 218.00 | 100.0 % |
| 17 | jc_bach_sonatas | False | 6653.00 | 80.0 % |
| 18 | jc_bach_sonatas | True | 1659.50 | 20.0 % |
| 19 | liszt_pelerinage | False | 6833.42 | 70.4 % |
| 20 | liszt_pelerinage | True | 2868.46 | 29.6 % |
| 21 | mahler_kindertotenlieder | False | 665.00 | 36.3 % |
| 22 | mahler_kindertotenlieder | True | 1165.00 | 63.7 % |
| 23 | medtner_tales | False | 1598.67 | 34.7 % |
| 24 | medtner_tales | True | 3011.33 | 65.3 % |
| 25 | pleyel_quartets | False | 1713.00 | 56.5 % |
| 26 | pleyel_quartets | True | 1321.50 | 43.5 % |
| 27 | scarlatti_sonatas | False | 7280.38 | 53.4 % |
| 28 | scarlatti_sonatas | True | 6352.50 | 46.6 % |
| 29 | schubert_dances | False | 1038.00 | 96.2 % |
| 30 | schubert_dances | True | 41.00 | 3.8 % |
| 31 | schumann_kinderszenen | False | 700.00 | 74.9 % |
| 32 | schumann_kinderszenen | True | 234.00 | 25.1 % |
| 33 | tchaikovsky_seasons | False | 2387.00 | 60.9 % |
| 34 | tchaikovsky_seasons | True | 1532.00 | 39.1 % |
| 35 | wf_bach_sonatas | False | 1419.62 | 74.9 % |
| 36 | wf_bach_sonatas | True | 474.50 | 25.1 % |
chronological_order
['corelli',
'handel_keyboard',
'bach_solo',
'scarlatti_sonatas',
'wf_bach_sonatas',
'jc_bach_sonatas',
'pleyel_quartets',
'beethoven_piano_sonatas',
'schubert_dances',
'chopin_mazurkas',
'schumann_kinderszenen',
'c_schumann_lieder',
'liszt_pelerinage',
'tchaikovsky_seasons',
'dvorak_silhouettes',
'grieg_lyrical_pieces',
'mahler_kindertotenlieder',
'debussy_suite_bergamasque',
'medtner_tales']
fig = px.bar(maj_min_ratio_per_dataset.reset_index(),
x="corpus",
y="duration_qb",
color="localkey_is_minor",
text='fraction',
labels=dict(dataset='', duration_qb="aggregated duration in quarter notes"),
category_orders=dict(dataset=chronological_order)
)
fig.update_layout(**STD_LAYOUT)
Annotation table sliced by key segments¶
annotations_by_keys = segmented_by_keys.get_facet("expanded")
annotations_by_keys
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | special | volta | pedalend | placement | |||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| localkey_is_minor | corpus | fname | localkey_slice | interval | |||||||||||||||||||||||||||||||||
| False | bach_solo | BWV1009_01_Prelude | [0.0, 264.0) | [0.0, 6.0) | 1 | 1 | 0 | 6.0 | 0 | 0 | 3/4 | 1 | 1 | C.I{ | NaN | C | I | NaN | I | I | NaN | NaN | NaN | NaN | NaN | { | M | False | False | (0, 4, 1) | () | 0 | 0 | NaN | <NA> | NaN | NaN |
| [6.0, 8.0) | 3 | 3 | 6 | 2.0 | 0 | 0 | 3/4 | 1 | 1 | V | NaN | C | I | NaN | V | V | NaN | NaN | NaN | NaN | NaN | NaN | M | False | False | (1, 5, 2) | () | 1 | 1 | NaN | <NA> | NaN | NaN | ||||
| [8.0, 9.0) | 3 | 3 | 8 | 1.0 | 1/2 | 1/2 | 3/4 | 1 | 1 | V7 | NaN | C | I | NaN | V7 | V | NaN | 7 | NaN | NaN | NaN | NaN | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | NaN | <NA> | NaN | NaN | ||||
| [9.0, 12.0) | 4 | 4 | 9 | 3.0 | 0 | 0 | 3/4 | 1 | 1 | I | NaN | C | I | NaN | I | I | NaN | NaN | NaN | NaN | NaN | NaN | M | False | False | (0, 4, 1) | () | 0 | 0 | NaN | <NA> | NaN | NaN | ||||
| [12.0, 14.0) | 5 | 5 | 12 | 2.0 | 0 | 0 | 3/4 | 1 | 1 | V | NaN | C | I | NaN | V | V | NaN | NaN | NaN | NaN | NaN | NaN | M | False | False | (1, 5, 2) | () | 1 | 1 | NaN | <NA> | NaN | NaN | ||||
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| True | wf_bach_sonatas | F003_n04c | [266.0, 335.0) | [327.0, 327.0) | 84 | 82 | 327 | 0.0 | 1/2 | 1/2 | 2/2 | 2 | 1 | { | NaN | D | vi | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | { | NaN | False | True | () | () | <NA> | <NA> | NaN | <NA> | NaN | NaN |
| [330.0, 331.0) | 85 | 83 | 330 | 1.0 | 1/4 | 1/4 | 2/2 | 2 | 1 | i6 | NaN | D | vi | NaN | i6 | i | NaN | 6 | NaN | NaN | NaN | NaN | m | False | True | (-3, 1, 0) | () | 0 | -3 | NaN | <NA> | NaN | NaN | ||||
| [331.0, 332.0) | 85 | 83 | 331 | 1.0 | 1/2 | 1/2 | 2/2 | 2 | 1 | iv | NaN | D | vi | NaN | iv | iv | NaN | NaN | NaN | NaN | NaN | NaN | m | False | True | (-1, -4, 0) | () | -1 | -1 | NaN | <NA> | NaN | NaN | ||||
| [332.0, 333.0) | 85 | 83 | 332 | 1.0 | 3/4 | 3/4 | 2/2 | 2 | 1 | V | NaN | D | vi | NaN | V | V | NaN | NaN | NaN | NaN | NaN | NaN | M | False | True | (1, 5, 2) | () | 1 | 1 | NaN | <NA> | NaN | NaN | ||||
| [333.0, 335.0) | 86 | 84 | 333 | 2.0 | 0 | 0 | 2/2 | 2 | 1 | i|IAC}{ | NaN | D | vi | NaN | i | i | NaN | NaN | NaN | NaN | IAC | }{ | m | False | True | (0, -3, 1) | () | 0 | 0 | NaN | <NA> | NaN | NaN |
95750 rows × 33 columns
HTML('''<script>
function code_toggle() {
if (code_shown){
$('div.input').hide('500');
$('#toggleButton').val('Show Code')
} else {
$('div.input').show('500');
$('#toggleButton').val('Hide Code')
}
code_shown = !code_shown
}
$( document ).ready(function(){
code_shown=false;
$('div.input').hide()
});
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
Phrases¶
Overview¶
Presence of phrase annotation symbols per dataset:¶
all_labels.groupby(["corpus"]).phraseend.value_counts()
corpus phraseend
bach_solo } 172
{ 170
}{ 24
beethoven_piano_sonatas } 925
{ 918
}{ 424
c_schumann_lieder { 55
} 55
}{ 34
chopin_mazurkas } 505
{ 498
}{ 49
corelli } 705
{ 702
}{ 379
debussy_suite_bergamasque { 15
} 15
}{ 10
dvorak_silhouettes { 93
} 92
}{ 77
grieg_lyrical_pieces } 518
{ 514
}{ 33
handel_keyboard { 25
} 25
}{ 1
jc_bach_sonatas } 297
{ 293
}{ 144
\\ 5
liszt_pelerinage } 208
{ 205
}{ 68
mahler_kindertotenlieder } 19
{ 18
}{ 11
medtner_tales { 150
} 150
}{ 56
pleyel_quartets } 86
{ 85
}{ 40
scarlatti_sonatas } 566
{ 563
}{ 426
schubert_dances { 71
} 71
schumann_kinderszenen } 83
{ 79
}{ 2
tchaikovsky_seasons { 288
} 288
}{ 10
wf_bach_sonatas } 88
{ 87
}{ 71
\\ 3
Name: phraseend, dtype: int64
Presence of legacy phrase endings¶
all_labels[all_labels.phraseend == r'\\'].style.apply(color_background, subset="label")
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | special | pedalend | placement | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | interval | |||||||||||||||||||||||||||||||||
| jc_bach_sonatas | wa01op05no1a_Allegretto | [124.0, 124.0) | 64 | 62 | 124 | 0.000000 | 0 | 0 | 2/4 | 2 | 1 | \\ | nan | Bb | I | nan | nan | nan | nan | nan | nan | nan | nan | \\ | nan | False | False | () | () | nan | nan | nan | |||
| wa04op05no4a_Allegro | [168.0, 172.0) | 43 | 43 | 168 | 4.000000 | 0 | 0 | 4/4 | 2 | 1 | I]\\ | nan | Eb | V | I | I | I | nan | nan | nan | nan | nan | \\ | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | ||
| [464.0, 468.0) | 117 | 117 | 464 | 4.000000 | 0 | 0 | 4/4 | 2 | 1 | I]\\ | nan | Eb | I | I | I | I | nan | nan | nan | nan | nan | \\ | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |||
| wa05op05no5b_Adagio | [10.0, 11.0) | 4 | 4 | 10 | 1.000000 | 1/4 | 1/4 | 3/4 | 2 | 1 | I\\ | nan | A | I | nan | I | I | nan | nan | nan | nan | nan | \\ | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | ||
| [118.0, 119.0) | 40 | 40 | 118 | 1.000000 | 1/4 | 1/4 | 3/4 | 2 | 1 | I\\ | nan | A | I | nan | I | I | nan | nan | nan | nan | nan | \\ | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |||
| wf_bach_sonatas | F003_n04a | [27.5, 28.5) | 15 | 14 | 55/2 | 1.000000 | 1/4 | 1/4 | 2/4 | 2 | 1 | V6\\ | nan | D | V | nan | V6 | V | nan | 6 | nan | nan | nan | \\ | M | False | False | (5, 2, 1) | () | 1 | 5 | nan | nan | nan | |
| [113.5, 114.0) | 59 | 57 | 227/2 | 0.500000 | 1/4 | 1/4 | 2/4 | 2 | 1 | vi\\ | nan | D | V | nan | vi | vi | nan | nan | nan | nan | nan | \\ | m | False | False | (3, 0, 4) | () | 3 | 3 | nan | nan | nan | |||
| [151.0, 151.5) | 78 | 76 | 151 | 0.500000 | 1/8 | 1/8 | 2/4 | 2 | 1 | vi\\ | nan | D | I | nan | vi | vi | nan | nan | nan | nan | nan | \\ | m | False | False | (3, 0, 4) | () | 3 | 3 | nan | nan | nan |
A table with the extents of all annotated phrases¶
Relevant columns:
quarterbeats: start position for each phraseduration_qb: duration of each phrase, measured in quarter notesphrase_slice: time interval of each annotated phrases (for segmenting chord progressions and notes)
# segmented = PhraseSlicer().process_data(hascadence)
segmented = dc.PhraseSlicer().process_data(grouped_by_dataset)
phrases = segmented.get_slice_info()
print(f"Overall number of phrases is {len(phrases.index)}")
phrases.head(10).style.apply(color_background, subset=["quarterbeats", "duration_qb"])
Overall number of phrases is 6688
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | special | pedalend | placement | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | phrase_slice | |||||||||||||||||||||||||||||||||
| bach_solo | BWV1009_01_Prelude | [0.0, 18.25) | 1 | 1 | 0 | 18.250000 | 0 | 0 | 3/4 | 1 | 1 | C.I{ | nan | C | I | nan | I | I | nan | nan | nan | nan | nan | { | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan |
| [18.25, 36.25) | 7 | 7 | 73/4 | 18.000000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [36.25, 78.0) | 13 | 13 | 145/4 | 41.750000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [78.0, 108.0) | 27 | 27 | 78 | 30.000000 | 0 | 0 | 3/4 | 1 | 1 | vi}{ | nan | C | I | nan | vi | vi | nan | nan | nan | nan | nan | }{ | m | False | False | (3, 0, 4) | () | 3 | 3 | nan | nan | nan | nan | ||
| [108.0, 180.25) | 37 | 37 | 108 | 72.250000 | 0 | 0 | 3/4 | 1 | 1 | I}{ | nan | C | I | nan | I | I | nan | nan | nan | nan | nan | }{ | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan | ||
| [180.25, 210.25) | 61 | 61 | 721/4 | 30.000000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [210.25, 244.25) | 71 | 71 | 841/4 | 34.000000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [244.25, 264.0) | 82 | 82 | 977/4 | 19.750000 | 5/16 | 5/16 | 3/4 | 1 | 1 | { | nan | C | I | I | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| BWV1009_02_Allemande | [0.0, 15.75) | 1 | 0 | 0 | 15.750000 | 0 | 13/16 | 4/4 | 1 | 1 | C.V{ | nan | C | I | nan | V | V | nan | nan | nan | nan | nan | { | M | False | False | (1, 5, 2) | () | 1 | 1 | nan | nan | nan | nan | |
| [15.75, 48.0) | 5 | 4 | 63/4 | 32.250000 | 3/4 | 3/4 | 4/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan |
print(phrases.duration_qb.dtype)
phrases.duration_qb = pd.to_numeric(phrases.duration_qb)
object
Annotation table sliced by phrase annotations¶
ToDo: Example for overlap / phrase beginning without new chord
phrase_segments = segmented.get_facet("expanded")
phrase_segments.head(10)
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | special | pedalend | placement | ||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | phrase_slice | interval | |||||||||||||||||||||||||||||||||
| bach_solo | BWV1009_01_Prelude | [0.0, 18.25) | [0.0, 0.0) | 1 | 1 | 0.0 | 0.00 | 0 | 0 | 3/4 | 1 | 1 | C.I{ | NaN | C | I | NaN | <NA> | <NA> | <NA> | <NA> | <NA> | NaN | NaN | { | <NA> | False | False | <NA> | <NA> | <NA> | <NA> | <NA> | NaN | NaN | NaN |
| [0.0, 6.0) | 1 | 1 | 0.0 | 6.00 | 0 | 0 | 3/4 | 1 | 1 | C.I{ | NaN | C | I | NaN | I | I | NaN | NaN | NaN | NaN | <NA> | <NA> | M | False | False | (0, 4, 1) | () | 0 | 0 | <NA> | NaN | NaN | NaN | |||
| [6.0, 8.0) | 3 | 3 | 6.0 | 2.00 | 0 | 0 | 3/4 | 1 | 1 | V | NaN | C | I | NaN | V | V | NaN | NaN | NaN | NaN | NaN | NaN | M | False | False | (1, 5, 2) | () | 1 | 1 | <NA> | NaN | NaN | NaN | |||
| [8.0, 9.0) | 3 | 3 | 8.0 | 1.00 | 1/2 | 1/2 | 3/4 | 1 | 1 | V7 | NaN | C | I | NaN | V7 | V | NaN | 7 | NaN | NaN | NaN | NaN | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | <NA> | NaN | NaN | NaN | |||
| [9.0, 12.0) | 4 | 4 | 9.0 | 3.00 | 0 | 0 | 3/4 | 1 | 1 | I | NaN | C | I | NaN | I | I | NaN | NaN | NaN | NaN | NaN | NaN | M | False | False | (0, 4, 1) | () | 0 | 0 | <NA> | NaN | NaN | NaN | |||
| [12.0, 14.0) | 5 | 5 | 12.0 | 2.00 | 0 | 0 | 3/4 | 1 | 1 | V | NaN | C | I | NaN | V | V | NaN | NaN | NaN | NaN | NaN | NaN | M | False | False | (1, 5, 2) | () | 1 | 1 | <NA> | NaN | NaN | NaN | |||
| [14.0, 15.0) | 5 | 5 | 14.0 | 1.00 | 1/2 | 1/2 | 3/4 | 1 | 1 | V7 | NaN | C | I | NaN | V7 | V | NaN | 7 | NaN | NaN | NaN | NaN | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | <NA> | NaN | NaN | NaN | |||
| [15.0, 15.0) | 6 | 6 | 15.0 | 0.00 | 0 | 0 | 3/4 | 1 | 1 | I} | NaN | C | I | NaN | <NA> | <NA> | <NA> | <NA> | <NA> | NaN | NaN | } | <NA> | False | False | <NA> | <NA> | <NA> | <NA> | <NA> | NaN | NaN | NaN | |||
| [15.0, 18.25) | 6 | 6 | 15.0 | 3.25 | 0 | 0 | 3/4 | 1 | 1 | I} | NaN | C | I | NaN | I | I | NaN | NaN | NaN | NaN | <NA> | <NA> | M | False | False | (0, 4, 1) | () | 0 | 0 | <NA> | NaN | NaN | NaN | |||
| [18.25, 36.25) | [18.25, 18.25) | 7 | 7 | 18.25 | 0.00 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | NaN | C | I | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | { | NaN | False | False | () | () | <NA> | <NA> | <NA> | NaN | NaN | NaN |
print(phrase_segments.duration_qb.dtype)
phrase_segments.duration_qb = pd.to_numeric(phrase_segments.duration_qb)
float64
Distribution of phrase lengths¶
Histogram summarizing the lengths of all phrases measured in quarter notes¶
phrase_durations = phrases.duration_qb.value_counts()
histogram = px.histogram(x=phrase_durations.index, y=phrase_durations, labels=dict(x='phrase lengths binned to a quarter note', y='#phrases within length bin'))
histogram.update_traces(xbins=dict( # bins used for histogram
#start=0.0,
end=100.0,
size=1
))
histogram.update_xaxes(dtick=4)
histogram.show()
Bar plot showing approximative phrase length in measures¶
Simply by subtracting for the span of every phrase the first measure measure number from the last.
phrase_gpb = phrase_segments.groupby(level=[0,1,2])
phrase_length_in_measures = phrase_gpb.mn.max() - phrase_gpb.mn.min()
measure_length_counts = phrase_length_in_measures.value_counts()
fig = px.bar(x=measure_length_counts.index, y=measure_length_counts, labels=dict(x="approximative size of all phrases (difference between end and start measure number)",
y="#phrases"))
fig.update_xaxes(dtick=4)
Histogram summarizing phrase lengths by precise length expressed in measures¶
In order to divide the phrase length by the length of a measure, the phrases containing more than one time signature are filtered out.
Durations computed by dividing the duration by the measure length
phrase2timesigs = phrase_gpb.timesig.unique()
n_timesignatures_per_phrase = phrase2timesigs.map(len)
uniform_timesigs = phrase2timesigs[n_timesignatures_per_phrase == 1].map(lambda l: l[0])
more_than_one = n_timesignatures_per_phrase > 1
print(f"Filtered out the {more_than_one.sum()} phrases incorporating more than one time signature.")
n_timesigs = n_timesignatures_per_phrase.value_counts()
display(n_timesigs.reset_index().rename(columns=dict(index='#time signatures', timesig='#phrases')))
uniform_timesig_phrases = phrases.loc[uniform_timesigs.index]
timesig_in_quarterbeats = uniform_timesigs.map(Fraction) * 4
exact_measure_lengths = uniform_timesig_phrases.duration_qb / timesig_in_quarterbeats
uniform_timesigs = pd.concat([exact_measure_lengths.rename('duration_measures'), uniform_timesig_phrases], axis=1)
uniform_timesigs.to_csv('cadence_datasets_uniform_timesigs.tsv.zip', sep='\t')
fig = px.histogram(uniform_timesigs, x='duration_measures',
labels=dict(duration_measures='phrase length in measures, factoring in time signatures'))
fig.update_traces(xbins=dict( # bins used for histogram
#start=0.0,
#end=100.0,
size=1
))
fig.update_xaxes(dtick=4)
Filtered out the 69 phrases incorporating more than one time signature.
| #time signatures | #phrases | |
|---|---|---|
| 0 | 1 | 6619 |
| 1 | 2 | 68 |
| 2 | 3 | 1 |
uniform_timesigs.head(10).style.apply(color_background, subset='duration_measures')
| duration_measures | mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | special | pedalend | placement | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | phrase_slice | ||||||||||||||||||||||||||||||||||
| bach_solo | BWV1009_01_Prelude | [0.0, 18.25) | 6.083333 | 1 | 1 | 0 | 18.250000 | 0 | 0 | 3/4 | 1 | 1 | C.I{ | nan | C | I | nan | I | I | nan | nan | nan | nan | nan | { | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan |
| [18.25, 36.25) | 6.000000 | 7 | 7 | 73/4 | 18.000000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [36.25, 78.0) | 13.916667 | 13 | 13 | 145/4 | 41.750000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [78.0, 108.0) | 10.000000 | 27 | 27 | 78 | 30.000000 | 0 | 0 | 3/4 | 1 | 1 | vi}{ | nan | C | I | nan | vi | vi | nan | nan | nan | nan | nan | }{ | m | False | False | (3, 0, 4) | () | 3 | 3 | nan | nan | nan | nan | ||
| [108.0, 180.25) | 24.083333 | 37 | 37 | 108 | 72.250000 | 0 | 0 | 3/4 | 1 | 1 | I}{ | nan | C | I | nan | I | I | nan | nan | nan | nan | nan | }{ | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan | ||
| [180.25, 210.25) | 10.000000 | 61 | 61 | 721/4 | 30.000000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [210.25, 244.25) | 11.333333 | 71 | 71 | 841/4 | 34.000000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [244.25, 264.0) | 6.583333 | 82 | 82 | 977/4 | 19.750000 | 5/16 | 5/16 | 3/4 | 1 | 1 | { | nan | C | I | I | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| BWV1009_02_Allemande | [0.0, 15.75) | 3.937500 | 1 | 0 | 0 | 15.750000 | 0 | 13/16 | 4/4 | 1 | 1 | C.V{ | nan | C | I | nan | V | V | nan | nan | nan | nan | nan | { | M | False | False | (1, 5, 2) | () | 1 | 1 | nan | nan | nan | nan | |
| [15.75, 48.0) | 8.062500 | 5 | 4 | 63/4 | 32.250000 | 3/4 | 3/4 | 4/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan |
Inspecting long phrases¶
timsig_counts = uniform_timesigs.timesig.value_counts()
fig = px.bar(timsig_counts, labels=dict(index="time signature", value="#phrases"))
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
filter_counts_smaller_than = 5
filtered_timesigs = timsig_counts[timsig_counts < filter_counts_smaller_than].index.to_list()
fig = px.histogram(uniform_timesigs[~uniform_timesigs.timesig.isin(filtered_timesigs)],
x='duration_measures', facet_col='timesig', facet_col_wrap=2, height=1500)
fig.update_xaxes(matches=None, showticklabels=True, visible=True, dtick=4)
fig.update_yaxes(matches=None, showticklabels=True, visible=True)
fig.update_traces(xbins=dict( # bins used for histogram
#start=0.0,
end=50.0,
size=1
))
see_greater_equal = 33
longest_measure_length = uniform_timesigs.loc[uniform_timesigs.duration_measures >= see_greater_equal, ["duration_measures", "timesig"]]
for timesig, long_phrases in longest_measure_length.groupby('timesig'):
L = len(long_phrases)
plural = 's' if L > 1 else ''
display(HTML(f"<h3>{L} long phrase{plural} in {timesig} meter:</h3>"))
display(long_phrases.sort_values('duration_measures'))
1 long phrase in 12/8 meter:
| duration_measures | timesig | |||
|---|---|---|---|---|
| corpus | fname | phrase_slice | ||
| bach_solo | BWV1012_01_Prelude | [306.0, 594.0) | 48.0 | 12/8 |
10 long phrases in 2/2 meter:
| duration_measures | timesig | |||
|---|---|---|---|---|
| corpus | fname | phrase_slice | ||
| grieg_lyrical_pieces | op57n02 | [64.0, 196.0) | 33.0 | 2/2 |
| [260.0, 392.0) | 33.0 | 2/2 | ||
| beethoven_piano_sonatas | 26-1 | [800.0, 936.0) | 34.0 | 2/2 |
| pleyel_quartets | b309op2n3b | [500.0, 640.0) | 35.0 | 2/2 |
| beethoven_piano_sonatas | 09-3 | [186.0, 332.0) | 36.5 | 2/2 |
| 08-1 | [200.0, 352.0) | 38.0 | 2/2 | |
| 21-3 | [804.0, 956.0) | 38.0 | 2/2 | |
| 26-1 | [244.0, 404.0) | 40.0 | 2/2 | |
| 21-3 | [956.0, 1132.0) | 44.0 | 2/2 | |
| 08-1 | [576.0, 776.0) | 50.0 | 2/2 |
13 long phrases in 2/4 meter:
| duration_measures | timesig | |||
|---|---|---|---|---|
| corpus | fname | phrase_slice | ||
| liszt_pelerinage | 160.09_Les_Cloches_de_Geneve_(Nocturne) | [202.5, 270.0) | 33.75 | 2/4 |
| grieg_lyrical_pieces | op68n03 | [0.0, 68.0) | 34.0 | 2/4 |
| op38n01 | [100.0, 172.0) | 36.0 | 2/4 | |
| op68n04 | [16.0, 92.0) | 38.0 | 2/4 | |
| [92.0, 168.0) | 38.0 | 2/4 | ||
| beethoven_piano_sonatas | 06-1 | [152.5, 234.0) | 40.75 | 2/4 |
| grieg_lyrical_pieces | op54n06 | [36.0, 120.0) | 42.0 | 2/4 |
| liszt_pelerinage | 162.03_Tarantella_da_Guillaume_Louis_Cottrau._Presto_e_canzone_napolitana | [1025.5416666666667, 1109.875) | 42.166667 | 2/4 |
| [885.875, 970.875) | 42.5 | 2/4 | ||
| beethoven_piano_sonatas | 02-1 | [360.0, 448.0) | 44.0 | 2/4 |
| 21-3 | [476.0, 572.0) | 48.0 | 2/4 | |
| liszt_pelerinage | 160.09_Les_Cloches_de_Geneve_(Nocturne) | [302.0, 401.25) | 49.625 | 2/4 |
| beethoven_piano_sonatas | 23-3 | [314.0, 422.0) | 54.0 | 2/4 |
2 long phrases in 3/4 meter:
| duration_measures | timesig | |||
|---|---|---|---|---|
| corpus | fname | phrase_slice | ||
| chopin_mazurkas | BI153-1op56-1 | [306.0, 426.0) | 40.0 | 3/4 |
| debussy_suite_bergamasque | l075-02_suite_menuet | [147.0, 312.0) | 55.0 | 3/4 |
2 long phrases in 3/8 meter:
| duration_measures | timesig | |||
|---|---|---|---|---|
| corpus | fname | phrase_slice | ||
| beethoven_piano_sonatas | 17-3 | [258.75, 321.0) | 41.5 | 3/8 |
| [140.75, 225.75) | 56.666667 | 3/8 |
8 long phrases in 4/4 meter:
| duration_measures | timesig | |||
|---|---|---|---|---|
| corpus | fname | phrase_slice | ||
| liszt_pelerinage | 160.06_Vallee_dObermann | [165.0, 297.0) | 33.0 | 4/4 |
| mahler_kindertotenlieder | kindertotenlieder_01_nun_will_die_sonn | [144.0, 308.0) | 41.0 | 4/4 |
| beethoven_piano_sonatas | 32-1 | [364.125, 536.125) | 43.0 | 4/4 |
| 21-1 | [444.0, 620.0) | 44.0 | 4/4 | |
| debussy_suite_bergamasque | l075-04_suite_passepied | [152.0, 332.0) | 45.0 | 4/4 |
| liszt_pelerinage | 161.07_Apres_une_lecture_du_Dante | [909.0, 1101.0) | 48.0 | 4/4 |
| 160.06_Vallee_dObermann | [515.5, 711.25) | 48.9375 | 4/4 | |
| mahler_kindertotenlieder | kindertotenlieder_05_in_diesem_wetter | [236.0, 495.0) | 64.75 | 4/4 |
10 long phrases in 6/8 meter:
| duration_measures | timesig | |||
|---|---|---|---|---|
| corpus | fname | phrase_slice | ||
| beethoven_piano_sonatas | 18-4 | [83.5, 190.0) | 35.5 | 6/8 |
| [272.5, 380.5) | 36.0 | 6/8 | ||
| liszt_pelerinage | 162.03_Tarantella_da_Guillaume_Louis_Cottrau._Presto_e_canzone_napolitana | [0.0, 111.0) | 37.0 | 6/8 |
| grieg_lyrical_pieces | op62n05 | [74.5, 186.0) | 37.166667 | 6/8 |
| beethoven_piano_sonatas | 03-4 | [774.5, 891.5) | 39.0 | 6/8 |
| 18-4 | [596.5, 715.0) | 39.5 | 6/8 | |
| liszt_pelerinage | 160.09_Les_Cloches_de_Geneve_(Nocturne) | [0.0, 123.0) | 41.0 | 6/8 |
| beethoven_piano_sonatas | 31-3 | [229.5, 354.0) | 41.5 | 6/8 |
| 18-4 | [380.5, 512.5) | 44.0 | 6/8 | |
| liszt_pelerinage | 162.01_Gondoliera | [243.5, 385.625) | 47.375 | 6/8 |
Local keys¶
local_keys_per_phrase = phrase_gpb.localkey.unique().map(tuple)
n_local_keys_per_phrase = local_keys_per_phrase.map(len)
phrases_with_keys = pd.concat([n_local_keys_per_phrase.rename('n_local_keys'),
local_keys_per_phrase.rename('local_keys'),
phrases], axis=1)
phrases_with_keys.head(10).style.apply(color_background, subset=['n_local_keys', 'local_keys'])
| n_local_keys | local_keys | mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | special | pedalend | placement | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | phrase_slice | |||||||||||||||||||||||||||||||||||
| bach_solo | BWV1009_01_Prelude | [0.0, 18.25) | 1 | ('I',) | 1 | 1 | 0 | 18.250000 | 0 | 0 | 3/4 | 1 | 1 | C.I{ | nan | C | I | nan | I | I | nan | nan | nan | nan | nan | { | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan |
| [18.25, 36.25) | 1 | ('I',) | 7 | 7 | 73/4 | 18.000000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [36.25, 78.0) | 1 | ('I',) | 13 | 13 | 145/4 | 41.750000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [78.0, 108.0) | 1 | ('I',) | 27 | 27 | 78 | 30.000000 | 0 | 0 | 3/4 | 1 | 1 | vi}{ | nan | C | I | nan | vi | vi | nan | nan | nan | nan | nan | }{ | m | False | False | (3, 0, 4) | () | 3 | 3 | nan | nan | nan | nan | ||
| [108.0, 180.25) | 1 | ('I',) | 37 | 37 | 108 | 72.250000 | 0 | 0 | 3/4 | 1 | 1 | I}{ | nan | C | I | nan | I | I | nan | nan | nan | nan | nan | }{ | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan | ||
| [180.25, 210.25) | 1 | ('I',) | 61 | 61 | 721/4 | 30.000000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [210.25, 244.25) | 1 | ('I',) | 71 | 71 | 841/4 | 34.000000 | 1/16 | 1/16 | 3/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| [244.25, 264.0) | 1 | ('I',) | 82 | 82 | 977/4 | 19.750000 | 5/16 | 5/16 | 3/4 | 1 | 1 | { | nan | C | I | I | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan | ||||
| BWV1009_02_Allemande | [0.0, 15.75) | 1 | ('I',) | 1 | 0 | 0 | 15.750000 | 0 | 13/16 | 4/4 | 1 | 1 | C.V{ | nan | C | I | nan | V | V | nan | nan | nan | nan | nan | { | M | False | False | (1, 5, 2) | () | 1 | 1 | nan | nan | nan | nan | |
| [15.75, 48.0) | 2 | ('I', 'V') | 5 | 4 | 63/4 | 32.250000 | 3/4 | 3/4 | 4/4 | 1 | 1 | { | nan | C | I | nan | nan | nan | nan | nan | nan | nan | nan | { | nan | False | False | () | () | nan | nan | nan | nan |
Number of unique local keys per phrase¶
count_n_keys = phrases_with_keys.n_local_keys.value_counts().rename("#phrases").to_frame()
count_n_keys.index.rename("unique keys", inplace=True)
count_n_keys
| #phrases | |
|---|---|
| unique keys | |
| 1 | 5422 |
| 2 | 1143 |
| 3 | 97 |
| 4 | 17 |
| 6 | 7 |
| 5 | 1 |
| 8 | 1 |
The most frequent keys for non-modulating phrases¶
unique_key_selector = phrases_with_keys.n_local_keys == 1
phrases_with_unique_key = phrases_with_keys[unique_key_selector].copy()
phrases_with_unique_key.local_keys = phrases_with_unique_key.local_keys.map(lambda t: t[0])
value_count_df(phrases_with_unique_key.local_keys, counts="#phrases")
| #phrases | |
|---|---|
| local_keys | |
| I | 2450 |
| i | 1579 |
| V | 451 |
| III | 281 |
| v | 172 |
| vi | 125 |
| IV | 56 |
| iii | 55 |
| VI | 53 |
| iv | 39 |
| VII | 27 |
| bVI | 19 |
| bIII | 16 |
| #V | 13 |
| ii | 13 |
| vi/V | 10 |
| #iii | 8 |
| #II | 6 |
| bII | 6 |
| bvi | 5 |
| #VII | 4 |
| iv/iv | 4 |
| bV | 4 |
| #iv | 3 |
| ii/VI | 3 |
| bII/V | 2 |
| bI | 2 |
| bIV | 2 |
| vii | 2 |
| II | 2 |
| bii | 1 |
| #III | 1 |
| bi | 1 |
| IV/IV | 1 |
| V/VII | 1 |
| bVII | 1 |
| #I | 1 |
| #vii | 1 |
| V/V | 1 |
| iii/bVI | 1 |
Most frequent modulations within one phrase¶
two_keys_selector = phrases_with_keys.n_local_keys > 1
phrases_with_unique_key = phrases_with_keys[two_keys_selector].copy()
value_count_df(phrases_with_unique_key.local_keys, "modulations")
| counts | |
|---|---|
| modulations | |
| (I, V) | 135 |
| (V, I) | 97 |
| (i, III) | 90 |
| (III, i) | 79 |
| (i, v) | 62 |
| (I, vi) | 59 |
| (vi, I) | 56 |
| (v, i) | 51 |
| (iv, i) | 32 |
| (i, I) | 27 |
| (III, v) | 21 |
| (I, iii) | 19 |
| (VI, i) | 16 |
| (i, iv) | 16 |
| (V, vi) | 16 |
| (I, i) | 15 |
| (IV, I) | 14 |
| (I, IV) | 13 |
| (III, iv) | 13 |
| (v, III) | 12 |
| (bIII, I) | 11 |
| (I, ii) | 11 |
| (iii, I) | 10 |
| (III, i, v) | 9 |
| (i, VI) | 8 |
| (I, bIII) | 8 |
| (III, I) | 7 |
| (I, III) | 7 |
| (V, I, vi) | 7 |
| (v, iv) | 7 |
| (ii, I) | 6 |
| (iv, v) | 6 |
| (I, bVI) | 6 |
| (vi, ii) | 5 |
| (i, ii) | 4 |
| (III, bV, iii, i) | 4 |
| (vi, iii) | 4 |
| (i, bIII) | 4 |
| (i, V) | 4 |
| (v, V) | 4 |
| (vi, V) | 4 |
| (ii, vi) | 4 |
| (VII, i) | 3 |
| (V, i) | 3 |
| (i, iii) | 3 |
| (bVI, I) | 3 |
| (v, I) | 3 |
| (III, i, iv) | 3 |
| (I, iv) | 3 |
| (bV, bVI, bvii) | 3 |
| (ii, iii) | 3 |
| (i, v, iv) | 3 |
| (V, iii) | 3 |
| (iv, III) | 3 |
| (I, #vi) | 3 |
| (I, #II) | 3 |
| (bIII, i) | 3 |
| (bVII, i, ii, iii, IV, I) | 3 |
| (vii, i) | 2 |
| (III, iv, v) | 2 |
| (bIII/bIII/V, I) | 2 |
| (bIII/V, bIII/bIII/V) | 2 |
| (V, bIII/V) | 2 |
| (VII, I) | 2 |
| (I, VII) | 2 |
| (iii, vi) | 2 |
| (iii, bv) | 2 |
| (VI, iv) | 2 |
| (I, bVII) | 2 |
| (#II, I) | 2 |
| (VI, bII) | 2 |
| (i, bI) | 2 |
| (vi, V, iii) | 2 |
| (IV, III, I) | 2 |
| (VI, I) | 2 |
| (i, #iii) | 2 |
| (iv, IV) | 2 |
| (VII, v) | 2 |
| (bVI, iii/bVI) | 2 |
| (V, ii) | 2 |
| (vi, IV) | 2 |
| (bIII, iv) | 2 |
| (vii, I) | 2 |
| (bIII, iv, i) | 2 |
| (v, v/v) | 2 |
| (III, V) | 2 |
| (iv/iv, bII, iv, VI, i, III) | 2 |
| (iv, iv/iv) | 2 |
| (VII, bIII, II) | 2 |
| (bII, i) | 2 |
| (v, IV) | 2 |
| (i, VII) | 2 |
| (I, #V) | 2 |
| (bII, bIII/bII) | 1 |
| (bi, iv, i) | 1 |
| (bv, i, bV, V, bii, VI) | 1 |
| (iii, bVII) | 1 |
| (#iii/ii, ii, bii, iii, iv, I) | 1 |
| (#V, V) | 1 |
| (III, V/III) | 1 |
| (VI, iv, I) | 1 |
| (IV, #vi, VII, iv, vi) | 1 |
| (VII, V, vi) | 1 |
| (V, #III, VII, iii, iv, bi, bii, I) | 1 |
| (vi, ii, #iii/ii) | 1 |
| (I, III, V) | 1 |
| (VII, III) | 1 |
| (#iii, bii/#iii, i) | 1 |
| (V, VII) | 1 |
| (iii, I, i) | 1 |
| (vi, #iv) | 1 |
| (ii, vi, i) | 1 |
| (iii, I, vi) | 1 |
| (#iv, bvi) | 1 |
| (VI, bIII/VI) | 1 |
| (#II, II, V) | 1 |
| (bII, iv, I, #III) | 1 |
| (#III, VII) | 1 |
| (VII, #VII, #IV) | 1 |
| (#IV, #III) | 1 |
| (#III, II) | 1 |
| (II, i) | 1 |
| (iv, VI) | 1 |
| (iv/iii, i) | 1 |
| (iv, bIII, v, I) | 1 |
| (iv, bIII, v, III/v) | 1 |
| (bVII, iii/iii, iii) | 1 |
| (bVII, I, II, III) | 1 |
| (#VII, i) | 1 |
| (I, bII, bIII) | 1 |
| (V, bI) | 1 |
| (bii, i) | 1 |
| (ii/VI, #VI) | 1 |
| (IV, ii, I) | 1 |
| (i, I, II) | 1 |
| (v, i, iv) | 1 |
| (i, iv, III) | 1 |
| (II, v) | 1 |
| (i, iv, VI) | 1 |
| (III, VII) | 1 |
| (v, VII/v) | 1 |
| (VI, bIII/VI, V, i) | 1 |
| (VII/v, v) | 1 |
| (V, bIII) | 1 |
| (bIII/bII, i) | 1 |
| (bv, i) | 1 |
| (i, bv) | 1 |
| (III, bii) | 1 |
| (#VI, I) | 1 |
| (i, III, vii, iv) | 1 |
| (VII, biii) | 1 |
| (V, vi, I) | 1 |
| (bvi, v, bv, I) | 1 |
| (V, biii, I) | 1 |
| (bIII, bVII) | 1 |
| (bIII, bvi, bV) | 1 |
| (bV, bbVII, I) | 1 |
| (I, bI, VI) | 1 |
| (iii, iv/iii) | 1 |
| (I, V, v) | 1 |
| (#iii, v/#iii) | 1 |
| (I, III, i) | 1 |
| (#VI, bV) | 1 |
| (bV, i) | 1 |
| (ii, vi, I) | 1 |
| (#VI, VI) | 1 |
| (VI, ii/VI) | 1 |
| (bIII/VI, VII, i) | 1 |
| (iv, I, V) | 1 |
| (#II, II, I) | 1 |
| (#VII, v) | 1 |
| (bII/V, iv) | 1 |
| (iv, bII) | 1 |
| (ii, v) | 1 |
| (IV, bII, I) | 1 |
| (iii, III) | 1 |
| (v, #iii) | 1 |
| (#iii, v) | 1 |
| (iv, bVI) | 1 |
| (bIII, i, bVI) | 1 |
| (bVI, i) | 1 |
| (iv, IV/IV, IV/IV/IV) | 1 |
| (IV/IV/IV, iv) | 1 |
| (iv, I) | 1 |
| (#iii, i) | 1 |
| (V, I, VI) | 1 |
| (VI, II) | 1 |
| (II, I) | 1 |
| (bIII, I, VI) | 1 |
| (III, VI) | 1 |
| (vi/V, I) | 1 |
| (I, vi/V) | 1 |
| (ii, IV) | 1 |
| (v/v/v, v, V) | 1 |
| (V, vi, ii) | 1 |
| (ii, I, vi) | 1 |
| (v, iv, III, i) | 1 |
| (V, v) | 1 |
| (bVI, iv) | 1 |
| (iv, i, I) | 1 |
| (I, vii) | 1 |
| (v/v, v/v/v) | 1 |
| (V, i, iv, V/V) | 1 |
| (i, vi) | 1 |
| (V/V, v, i) | 1 |
| (I, v) | 1 |
| (v/v, i) | 1 |
| (iii, vi, IV) | 1 |
| (vii, bII) | 1 |
| (bII, iv, i) | 1 |
| (IV, i) | 1 |
| (i, bII) | 1 |
| (III, iii) | 1 |
| (v, VI) | 1 |
| (iv, i, III) | 1 |
| (bVII, i) | 1 |
| (III, v, iv, i) | 1 |
| (vi, i) | 1 |
| (i, III, v) | 1 |
| (v, iv, VI, i) | 1 |
| (v, iv, III) | 1 |
| (i, VII, III) | 1 |
| (III, iv, v, i) | 1 |
| (I, IV, III) | 1 |
| (I, VI) | 1 |
| (bVII, IV) | 1 |
| (VI, vi) | 1 |
| (I, bVII, bVI) | 1 |
| (iii/i, i) | 1 |
| (i, #VI, III/#VI) | 1 |
| (iv, bIII) | 1 |
| (biii, bII, I) | 1 |
| (V, V/V) | 1 |
| (iii/V/VII, I) | 1 |
| (III, iv, VI) | 1 |
| (V, I, IV, vi) | 1 |
| (v, iv, i) | 1 |
| (ii, V, I) | 1 |
| (VI, VII) | 1 |
| (bVI, III) | 1 |
| (bII, vii, VI) | 1 |
| (vii, II) | 1 |
| (vii, iii) | 1 |
| (iii, vii, I) | 1 |
| (bIV, i) | 1 |
| (bIV, I) | 1 |
| (I, bIV) | 1 |
| (bII, I) | 1 |
| (vi, I, ii) | 1 |
| (IV, V) | 1 |
| (vi, I, v) | 1 |
| (v, vi, I) | 1 |
| (v, i, III) | 1 |
| (v, III, i) | 1 |
| (i, v, III) | 1 |
| (III, v, i) | 1 |
| (V, V/V, I) | 1 |
| (iii/bVI, I) | 1 |
Cadences¶
Overall¶
PAC: Perfect Authentic Cadence
IAC: Imperfect Authentic Cadence
HC: Half Cadence
DC: Deceptive Cadence
EC: Evaded Cadence
PC: Plagal Cadence
HTML('''<script>
function code_toggle() {
if (code_shown){
$('div.input').hide('500');
$('#toggleButton').val('Show Code')
} else {
$('div.input').show('500');
$('#toggleButton').val('Hide Code')
}
code_shown = !code_shown
}
$( document ).ready(function(){
code_shown=false;
$('div.input').hide()
});
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
print(f"{all_labels.cadence.notna().sum()} cadence labels.")
value_count_df(all_labels.cadence)
5805 cadence labels.
| counts | |
|---|---|
| cadence | |
| PAC | 2809 |
| HC | 1599 |
| IAC | 1085 |
| EC | 137 |
| DC | 103 |
| PC | 72 |
px.pie(all_labels[all_labels.cadence.notna()], names="cadence", color="cadence", color_discrete_map=cadence_colors)
Per dataset¶
cadence_count_per_dataset = all_labels.groupby("corpus").cadence.value_counts()
cadence_fraction_per_dataset = cadence_count_per_dataset / cadence_count_per_dataset.groupby(level=0).sum()
px.bar(cadence_fraction_per_dataset.rename('count').reset_index(), x='corpus', y='count', color='cadence',
color_discrete_map=cadence_colors, category_orders=dict(dataset=chronological_order))
fig = px.pie(cadence_count_per_dataset.rename('count').reset_index(), names='cadence', color='cadence', values='count',
facet_col='corpus', facet_col_wrap=4, height=2000, color_discrete_map=cadence_colors)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**STD_LAYOUT)
Per phrase¶
Number of cadences per phrase¶
phrases_with_cadences = pd.concat([
phrase_gpb.cadence.nunique().rename('n_cadences'),
phrase_gpb.cadence.unique().rename('cadences').map(lambda l: tuple(e for e in l if not pd.isnull(e))),
phrases_with_keys
], axis=1)
value_count_df(phrases_with_cadences.n_cadences, counts="#phrases")
| #phrases | |
|---|---|
| n_cadences | |
| 1 | 5344 |
| 0 | 1174 |
| 2 | 166 |
| 3 | 4 |
n_cad = phrases_with_cadences.groupby(level='corpus').n_cadences.value_counts().rename('counts').reset_index().sort_values('n_cadences')
n_cad.n_cadences = n_cad.n_cadences.astype(str)
fig = px.bar(n_cad, x='corpus', y='counts', color='n_cadences', height=800, barmode='group',
labels=dict(n_cadences="#cadences in a phrase"),
category_orders=dict(dataset=chronological_order)
)
fig.show()
Combinations of cadence types for phrases with more than one cadence¶
value_count_df(phrases_with_cadences[phrases_with_cadences.n_cadences > 1].cadences)
| counts | |
|---|---|
| cadences | |
| (DC, PAC) | 40 |
| (HC, PAC) | 38 |
| (EC, PAC) | 35 |
| (IAC, PAC) | 12 |
| (EC, HC) | 11 |
| (PAC, HC) | 6 |
| (DC, HC) | 4 |
| (EC, IAC) | 3 |
| (DC, IAC) | 3 |
| (PAC, IAC) | 3 |
| (PC, PAC) | 3 |
| (PAC, DC) | 2 |
| (HC, PC) | 1 |
| (IAC, HC, PAC) | 1 |
| (EC, PC) | 1 |
| (HC, DC, PAC) | 1 |
| (PAC, PC) | 1 |
| (IAC, HC) | 1 |
| (EC, DC, PAC) | 1 |
| (HC, DC) | 1 |
| (IAC, EC) | 1 |
| (DC, EC, IAC) | 1 |
Positioning of cadences within phrases¶
df_rows = []
y_position = 0
for ix in phrases_with_cadences[phrases_with_cadences.n_cadences > 0].sort_values('duration_qb').index:
df = phrase_segments.loc[ix]
description = str(ix)
if df.cadence.notna().any():
interval = ix[2]
df_rows.append((y_position, interval.length, "end of phrase", description))
start_pos = interval.left
cadences = df.loc[df.cadence.notna(), ['quarterbeats', 'cadence']]
cadences.quarterbeats -= start_pos
for cadence_x, cadence_type in cadences.itertuples(index=False, name=None):
df_rows.append((y_position, cadence_x, cadence_type, description))
y_position += 1
#else:
# df_rows.append((y_position, pd.NA, pd.NA, description))
data = pd.DataFrame(df_rows, columns=["phrase_ix", "x", "marker", "description"])
fig = px.scatter(data[data.x.notna()], x='x', y="phrase_ix", color="marker", hover_name="description", height=3000,
labels=dict(marker='legend'), color_discrete_map=cadence_colors)
fig.update_traces(marker_size=5)
fig.update_yaxes(autorange="reversed")
fig.show()
Cadence ultima¶
HTML('''<script>
function code_toggle() {
if (code_shown){
$('div.input').hide('500');
$('#toggleButton').val('Show Code')
} else {
$('div.input').show('500');
$('#toggleButton').val('Hide Code')
}
code_shown = !code_shown
}
$( document ).ready(function(){
code_shown=false;
$('div.input').hide()
});
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
phrase_segments = segmented.get_facet("expanded")
cadence_selector = phrase_segments.cadence.notna()
missing_chord_selector = phrase_segments.chord.isna()
cadence_with_missing_chord_selector = cadence_selector & missing_chord_selector
#print(f"Ultima missing for {cadence_with_missing_chord_selector.sum()} cadences.")
missing = phrase_segments[cadence_with_missing_chord_selector]
expanded = ms3.expand_dcml.expand_labels(phrase_segments[cadence_with_missing_chord_selector], propagate=False, chord_tones=True, skip_checks=True)
phrase_segments.loc[cadence_with_missing_chord_selector] = expanded
print(f"Ultima harmony missing for {(phrase_segments.cadence.notna() & phrase_segments.bass_note.isna()).sum()} cadence labels.")
MC 49: #vii in major context corrected to vii.
Ultima harmony missing for 29 cadence labels.
Ultimae as Roman numeral¶
def highlight(row, color="#ffffb3"):
if row.counts < 10:
return [None, None, None, None]
else:
return ["background-color: #ffffb3;"] * 4
cadence_counts = all_labels.cadence.value_counts()
ultima_root = phrase_segments.groupby(['localkey_is_minor', 'cadence']).numeral.value_counts().rename('counts').to_frame().reset_index()
ultima_root.localkey_is_minor = ultima_root.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_root.style.apply(highlight, axis=1)
fig = px.pie(ultima_root, names='numeral', values='counts',
facet_row='cadence', facet_col='localkey_is_minor',
height=1500,
category_orders={'cadence': cadence_counts.index},
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()
#phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral == 'V')).any() else None)
Ultimae bass note as scale degree¶
ultima_bass = phrase_segments.groupby(['localkey_is_minor','cadence']).bass_note.value_counts().rename('counts').reset_index()
ultima_bass.bass_note = ms3.transform(ultima_bass, ms3.fifths2sd, dict(fifths='bass_note', minor='localkey_is_minor'))
ultima_bass.localkey_is_minor = ultima_bass.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_bass.style.apply(highlight, axis=1)
fig = px.pie(ultima_bass, names='bass_note', values='counts',
facet_row='cadence', facet_col='localkey_is_minor',
height=1500,
category_orders={'cadence': cadence_counts.index},
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()
Chord progressions¶
PACs with ultima I/i¶
#pac_on_i = phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral.isin(('I', 'i')))).any() else None)
#pac_on_i.cadence.value_counts()
#pac_on_i.droplevel(-1).index.nunique()
def get_progressions(selected='PAC', last_row={}, feature='chord', dataset=None, as_series=True):
"""Uses the nonlocal variable phrase_segments."""
last_row = {k: v if isinstance(v, tuple) else (v,) for k, v in last_row.items()}
progressions = []
for (corp, fname, *_), df in phrase_segments[phrase_segments[feature].notna()].groupby(level=[0,1,2]):
if dataset is not None and dataset not in corp:
continue
if (df.cadence == selected).fillna(False).any():
# remove chords after the last cadence label
df = df[df.cadence.fillna(method='bfill').notna()]
# group segments leading up to a cadence label
cadence_groups = df.cadence.notna().shift().fillna(False).cumsum()
for i, cadence in df.groupby(cadence_groups):
last_r = cadence.iloc[-1]
typ = last_r.cadence
if typ != selected:
continue
if any(last_r[feat] not in values for feat, values in last_row.items()):
continue
progressions.append(tuple(cadence[feature]))
if as_series:
return pd.Series(progressions)
return progressions
chord_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'chord')
print(f"Progressions for {len(chord_progressions)} cadences:")
value_count_df(chord_progressions, "chord progressions")
Progressions for 2774 cadences:
| counts | |
|---|---|
| chord progressions | |
| (V, V7, I, ii6(2), ii6, V7, I) | 12 |
| (I, V7, I) | 12 |
| (I, I6, IV, V(64), V, I) | 10 |
| (I, IV, V(64), V, I) | 9 |
| (i, VM7, i, V7, V7(#2), V7, i) | 8 |
| ... | ... |
| (i, iv, V, iv6, V, VI, i, iv, i, iv, iv6, V, i6, i, V, iv6, V, iv, iv7, V, i) | 1 |
| (i6, ii%65, V(4), V, i) | 1 |
| (i, V6, i, iv6(2), iv6, iio6, V, III, VII(4), VII, #viio, i(9), i, i6, ii%65, V(4), V, i) | 1 |
| (i, V6, V65, I, vi6, V6(2), V6, iii6, IV6(2), IV6, ii6, V7, vi, IVM7, V(4), V, I) | 1 |
| (I, V7/IV, IV, ii7, V, iii7, vi, IVM7, V, ii, V, V2, I6, vi%43, V43, I, V(4), V, I) | 1 |
2320 rows × 1 columns
numeral_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'numeral')
value_count_df(numeral_progressions, "numeral progressions")
| counts | |
|---|---|
| numeral progressions | |
| (I, V, V, I) | 21 |
| (I, IV, V, V, I) | 18 |
| (I, V, I) | 16 |
| (I, V, I, V, I, V, I, V, I) | 13 |
| (I, I, IV, V, V, I) | 13 |
| ... | ... |
| (I, ii, I, I, ii, V, V, I) | 1 |
| (i, V, iv, ii, V, i, V, V, i) | 1 |
| (v, i, iv, V, III, VI, ii, V, i, iv, V, i, iv, ii, V, i, V, V, i) | 1 |
| (I, IV, I, V, V, I) | 1 |
| (I, #vii, i, V, i, v, iv, iv, ii, V, VI, ii, V, V, i) | 1 |
2141 rows × 1 columns
def remove_immediate_duplicates(l):
return tuple(a for a, b in zip(l, (None, ) + l) if a != b)
numeral_prog_no_dups = numeral_progressions.map(remove_immediate_duplicates)
value_count_df(numeral_prog_no_dups)
| counts | |
|---|---|
| (I, V, I) | 56 |
| (I, IV, V, I) | 49 |
| (I, V, I, V, I) | 32 |
| (I, V, I, ii, V, I) | 24 |
| (i, V, i, V, i) | 23 |
| ... | ... |
| (I, vii, vi, #vii, i) | 1 |
| (V, iii, IV, ii, V, I, IV, V, I, ii, V, I) | 1 |
| (I, IV, V, I, vi, V, IV, iii, I, ii, V, I) | 1 |
| (i, V, IV, V, VI, ii, V, i) | 1 |
| (i, V, i, V, i, v, vi, iv, V, i, ii, V, i) | 1 |
1847 rows × 1 columns
PACs ending on scale degree 1¶
Scale degrees expressed w.r.t. major scale, regardless of actual key.
bass_progressions = get_progressions('PAC', dict(bass_note=0), 'bass_note')
bass_prog = bass_progressions.map(ms3.fifths2sd)
print(f"Progressions for {len(bass_progressions)} cadences:")
value_count_df(bass_prog, "bass progressions")
Progressions for 2580 cadences:
| counts | |
|---|---|
| bass progressions | |
| (1, 4, 5, 5, 1) | 23 |
| (1, 5, 1) | 23 |
| (1, 3, 4, 5, 5, 1) | 17 |
| (1, 5, 5, 1) | 15 |
| (5, 5, 1, 4, 4, 5, 1) | 12 |
| ... | ... |
| (1, 3, 4, 4, 4, b3, 5, 5, 1) | 1 |
| (5, 1, b6, 7, 5, b6, 4, 5, 1, 4, #7, 1, 4, 4, 4, b3, 5, 5, 1) | 1 |
| (1, 4, 1, 5, 5, 1) | 1 |
| (1, 6, 2, 5, 1, 4, 1, 5, 5, 1) | 1 |
| (5, 4, 1, 1, 2, 2, 3, 3, 4, 5, 1, 3, 4, 4, 5, 6, 3, 4, 5, 1) | 1 |
1988 rows × 1 columns
bass_prog_no_dups = bass_prog.map(remove_immediate_duplicates)
value_count_df(bass_prog_no_dups)
| counts | |
|---|---|
| (1, 5, 1) | 50 |
| (1, 5, 1, 5, 1) | 38 |
| (1, 4, 5, 1) | 35 |
| (1, 3, 4, 5, 1) | 25 |
| (5, 1, 4, 5, 1) | 19 |
| ... | ... |
| (1, #7, 1, 3, 4, 5, 1, 5, 4, 5, 1) | 1 |
| (1, b3, 1, 2, 5, 1) | 1 |
| (1, 6, #7, 1, #7, 6, 2, 5, 1, 4, #4, 5, 1) | 1 |
| (1, #7, 1, 3, 4, 5, b3, #7, 1, #7, 1) | 1 |
| (1, 4, 6, 5, 1, 3, 4, 5, 3, 4, 6, 3, 4, 5, 1) | 1 |
1772 rows × 1 columns
def make_sankey(data, labels, node_pos=None, margin={'l': 10, 'r': 10, 'b': 10, 't': 10}, pad=20, color='auto', **kwargs):
if color=='auto':
unique_labels = set(labels)
color_step = 100 / len(unique_labels)
unique_colors = {label: f'hsv({round(i*color_step)}%,100%,100%)' for i, label in enumerate(unique_labels)}
color = list(map(lambda l: unique_colors[l], labels))
fig = go.Figure(go.Sankey(
arrangement = 'snap',
node = dict(
pad = pad,
#thickness = 20,
#line = dict(color = "black", width = 0.5),
label = labels,
x = [node_pos[i][0] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
y = [node_pos[i][1] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
color = color,
),
link = dict(
source = data.source,
target = data.target,
value = data.value
),
),
)
fig.update_layout(margin=margin, **kwargs)
return fig
def progressions2graph_data(progressions, cut_at_stage=None):
stage_nodes = defaultdict(dict)
edge_weights = Counter()
node_counter = 0
for progression in progressions:
previous_node = None
for stage, current in enumerate(reversed(progression)):
if cut_at_stage and stage > cut_at_stage:
break
if current in stage_nodes[stage]:
current_node = stage_nodes[stage][current]
else:
stage_nodes[stage][current] = node_counter
current_node = node_counter
node_counter += 1
if previous_node is not None:
edge_weights.update([(current_node, previous_node)])
previous_node = current_node
return stage_nodes, edge_weights
def graph_data2sankey(stage_nodes, edge_weights):
data = pd.DataFrame([(u, v, w) for (u, v), w in edge_weights.items()], columns = ['source', 'target', 'value'])
node2label = {node: label for stage, nodes in stage_nodes.items() for label, node in nodes.items()}
labels = [node2label[i] for i in range(len(node2label))]
return make_sankey(data, labels)
def plot_progressions(progressions, cut_at_stage=None):
stage_nodes, edge_weights = progressions2graph_data(progressions, cut_at_stage=cut_at_stage)
return graph_data2sankey(stage_nodes, edge_weights)
plot_progressions(numeral_prog_no_dups, cut_at_stage=3)
chord_progressions_minor = get_progressions('PAC', dict(numeral='i', localkey_is_minor=True), 'root')
chord_progressions_minor
0 (1, 0, 0, 0, 0, 2, 1, 0, -4, -1, 1, 0, -1, 5, ...
1 (0, -2, -3, -1, 5, 1, 1, 0, -2, -4, 1, -1, -3,...
2 (0, 5, 5, 0, 0, 2, -1, 1, 1, 1, 5, 0, 2, 1, 0)
3 (1, 0, 1, 1, 1, 1, -1, 0, 1, 0)
4 (0, 1, 0, 5, 0)
...
1001 (1, 1, 1, 0, 5, 0, 2, 2, -1, -1, 5, 1, 0, 2, 1...
1002 (-4, 6, 1, 1, 0)
1003 (1, 1, 5, 0, 2, 1, 1, 4, -1, -1, 2, 5, 0, -1, ...
1004 (1, 1, 1, 0, -1, -1, -1, -2, -2, -3, -3, -4, -...
1005 (0, -1, 1, 1, 0)
Length: 1006, dtype: object
pac_major = get_progressions('PAC', dict(numeral='I', localkey_is_minor=False), 'chord')
plot_progressions(pac_major, cut_at_stage=4)
deceptive = get_progressions('DC', dict(localkey_is_minor=False), 'chord')
deceptive.value_counts()
(I, V43(4), I6, IV, V7, vi) 3
(i, viio43/V, V6, i, viio43/V, V6, I, IV6, I6, IV, viio6, I6, IV6, viio, I, ii65, V, vi) 2
(I, IV, V/vi, vi, V, I, IV, V(64), V7, vi) 2
(I, V, ii7(9), V, V, ii7(9), V, V7, ii7(9), V, ii7, V, ii7(13), ii7, V7(#2), V7(6), vi) 2
(i, iv/i, V(64)/i, V/i, I/i) 2
(I, IV, V(64), V, i) 2
(V43, I(4), V65, I, V2(6), V2, I6, #viio7/ii, iv64/ii, #viio65/ii, i6(6)/ii, #viio43/ii, i6/ii, vi, vii%2, vi, ii%43, I64, I6, V7/V, V7, V6/vi, vi) 2
(I, V, I, V7, I, V6, I, viio/V, V, vi) 2
(IV6, V6, I(4), I, I6, IV, ii6, V7(4), V7, vi(^2), vi) 2
(I, I6, IV, V(64), V, vi) 1
(vi, I6, ii6, V, vi) 1
(I, V6, vi, I6, ii6, V7, vi) 1
(V65, I, V7, I, V65, I, V7, bVI) 1
(I6, IV, V7, vi) 1
(V(4), V, V7, V64, V7, vi, I6, IV, ii6, V7, vi) 1
(V65, I, V7, I6, V65, I, V7, bVI) 1
(V7, vi) 1
(i, V, i, V, i, V, VI/i, iv, V, VI/i, iv, V, VI/i, iv, V(64)/i, V, VI/i) 1
(I(4), I, V(4), V, IV6(112), IV6, V65(2), V6, I(9), I, ii7, viio6, I6, viio6, I, V65, I(9), I, V(4), V7, V(64), V43, I(4), I, V(4), V, V65, i(4)) 1
(I, V6(4), V43/vi, vi, I6, ii6, V, V6/vi, vi, I6, ii6, V, V6/vi, vi) 1
(I, I6, ii6, V, vi) 1
(I, V, I, IV6, ii, V7, bVI) 1
(I, V7, I, V7, I, #viio7/ii, ii, V(64), V7, iv6) 1
(i6/iv, iv/iv, iio6/iv, V(64)/iv, V7/iv, bII) 1
(ii, V(64)/vi, It6/vi, V/vi, V7/V, V, V7/IV, IV, ii, V7(^9), V7, vi) 1
(#viio43/ii, ii, IV, V(64), V7, I) 1
(V(974), V, I, IV, V/vi, vi, I, IV, V(64), V7, vi) 1
(I64, IV6, #viio2/vi, vi, vii%2, I, IV, V(64), V7, vi) 1
(I, viio6, I6, IV, I, viio6, I6, IV, I, viio6, I6, IV, I, viio6, I6, viio/V, V, V(64), V7, V(64), V7, I, I6, ii6, V6/V, V(64), V(4), V7, I, I6, ii6, V6/V, V(64), V(4), V, bVI) 1
(V, V7, IV(+2), V7, vi) 1
(I, V2, I, V2, I, V2, I, V2, I, V2, I6, I, viio6, viio, vi6, vi, V6, V, IV6, IV, iii6, iii, ii6, ii, I6, I, V6, IV6, iii6, ii6, I6, viio6, I, V, ii, vi, IV, I, V(4), V, I, V, ii, vi, I/IV, V2/IV, I/IV, V2/IV, I/IV, V7, V(64), V, V7, I, V6, IV6, iii6, ii6, I6, viio6, I, V6, IV6, iii6, ii6, I6, V7, bVI) 1
(vi, V65/IV, IV, V7, vi, ii6, V(64), V7, vi(64), vi) 1
(V(64)/vi, viio65/V/vi, V/vi, iii, V43, I, V6, V7, #viio43/ii, V7, bVI) 1
(V, V2(9), iii, V43, I, #viio65/vii, V43, #viio2/iii, V7/IV, V+7/IV, IV, vii%43, V7/vi, V7(4+2)/vi, V7/vi, vi, IV(94), IV, I6, V43, I, IV(94), IV, I64, V(4), V, v, ii) 1
(I, ii2, I, I6, ii65, V(64), V, vi) 1
(I, I, V, I, vi, V/vi, vi, IV, V/IV, IV, ii, V/ii, ii, V, vi7) 1
(I, V2, I6, IV, V6/V, V, V6/vi, i/vi, V(64)/vi, V/vi, VI/vi) 1
(V2/V, V6, V2/IV, IV6, vi, ii, V7(^9), V7, I/bVI) 1
(I, V7, I, I6, viio6, vi6, V6, V7, I, IV64, I, V, V65, I, V, V65, I, ii6, V(64), V7, vi(64), vi) 1
dtype: int64
plot_progressions(deceptive, cut_at_stage=4)
plot_progressions(bass_prog_no_dups, cut_at_stage=7)
def remove_sd_accidentals(t):
return tuple(map(lambda sd: sd[-1], t))
bass_prog_no_acc_no_dup = bass_prog.map(remove_sd_accidentals).map(remove_immediate_duplicates)
plot_progressions(bass_prog_no_acc_no_dup, cut_at_stage=7)
HCs ending on V¶
half = get_progressions('HC', dict(numeral='V'), 'bass_note').map(ms3.fifths2sd)
print(f"Progressions for {len(half)} cadences:")
plot_progressions(half.map(remove_immediate_duplicates), cut_at_stage=5)
Progressions for 1541 cadences: